/*
Trim variables.
*/

cap program drop trim_impute
program define trim_impute 
    version 18.0
    syntax varlist, ///
        year_lb(int) ///
	year_ub(int) ///
	suffix(string) ///
	centile_ub(real) ///
	[centile_lb(real 0.0)]
	
    * don't double-count firms when evaluating the distribution
    tempvar flag
    bys lfirm year : gen `flag' = (_n == 1)
    
    * loop over all variables given
    foreach Y of varlist `varlist' {
	tempvar flag_trim
	tempvar trim_m2
	
	local trimY `Y'`suffix'
    	gen `trimY' = `Y' if inrange(year, `year_lb', `year_ub')
	* first, impute zeros (note: firm age >= 0 in the sample)
	replace `trimY' = 0 if `Y' == . & inrange(year, `year_lb', `year_ub')
	
	* calculate upper/lower bounds (based on distribution when var is nonzero)
	centile `Y' if `flag' == 1 & `Y' != 0 & ///
	    inrange(year, `year_lb', `year_ub'), centile(`centile_ub')
	local trim_ub = r(c_1)
	if (`centile_lb' != 0) {
	    centile `Y' if `flag' == 1 & `Y' != 0 & ///
	        inrange(year, `year_lb', `year_ub'), centile(`centile_lb')
	    local trim_lb = r(c_1)
	}
	else {
	    sum `Y'
	    local trim_lb = r(min)
	}
	
	* if variables are trimmed, remove units where their reference
	* period has been trimmed away
		gen `flag_trim' = !inrange(`trimY', `trim_lb', `trim_ub')
	bys lnr lfirm cohort_id : ///
		egen `trim_m2' = max(`flag_trim' * (year == cohort_id - 2))
	replace `trimY' = . if (`flag_trim' == 1 | `trim_m2' == 1) & ///
		inrange(year, `year_lb', `year_ub')
	
    }
    
end
